/*
 *  This file is part of JBIRCH.
 *
 *  JBIRCH is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  JBIRCH is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with JBIRCH.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

/*
 *  CFTree.java
 *  Copyright (C) 2009 Roberto Perdisci (roberto.perdisci@gmail.com)
 */


package ru.algorithm.clusterization.cftree3;


import net.sourceforge.sizeof.SizeOf;
import ru.common.Pair;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * This is an implementation of the BIRCH clustering algorithm described in:
 *
 * T. Zhang, R. Ramakrishnan, and M. Livny.
 * "BIRCH: A New Data Clustering Algorithm and Its Applications"
 * Data Mining and Knowledge Discovery, 1997.
 *
 * @author Roberto Perdisci (roberto.perdisci@gmail.com)
 * @version 0.1
 *
 */
public class CFTree {

    public static final double EPS = 1e-6;

	/**
	 * Used when computing if the tree is reaching memory limit
	 */
	private static final double MEM_LIM_FRAC = 10;

	/**
	 * Centroid Distance D0
	 */
	public static final int D0_DIST = 0;

	/**
	 * Centroid distance D1
	 */
	public static final int D1_DIST = 1;

	/**
	 * Cluster Distance D2
	 */
	public static final int D2_DIST = 2;

	/**
	 * Cluster Distance D3
	 */
	public static final int D3_DIST = 3;

	/**
	 * Cluster Distance D4
	 */
	public static final int D4_DIST = 4;

	/**
	 * The root node of the CFTree
	 */
	private CFNode root;

	/**
	 * dummy node that points to the list of leaves. used for fast retrieval of final subclusters
	 */
	private CFNode leafListStart = null;

	/**
	 * keeps count of the instances inserted into the tree
	 */
	private int instanceIndex = 0;


	/**
	 * the memory limit used when automatic rebuilding is active
	 */
	private long memLimit = (long)Math.pow(1024, 3); // default = 1GB

	/**
	 * used when automatic rebuilding is active
	 */
    // TODO : 
	private long periodicMemLimitCheck = 1000; // checks if memeory limit is exceeded every 100,000 insertions

    private Scorer scorer;

    private double PERCENT_DELETED = 0.3;

	/**
	 *
	 * @param maxNodeEntries parameter B
	 * @param distThreshold parameter T
	 * @param distFunction must be one of CFTree.D0_DIST,...,CFTree.D4_DIST, otherwise it will default to D0_DIST
	 * @param applyMergingRefinement if true, activates merging refinement after each node split
	 */
	public CFTree(int maxNodeEntries, double distThreshold, int distFunction, boolean applyMergingRefinement, Scorer scorer) {
		if(distFunction < D0_DIST || distFunction > D4_DIST)
			distFunction = D0_DIST;

		root = new CFNode(maxNodeEntries,distThreshold,distFunction,applyMergingRefinement,true);
		leafListStart = new CFNode(0,0,distFunction,applyMergingRefinement,true); // this is a dummy node that points to the fist leaf
		leafListStart.setNextLeaf(root); // at this point root is the only node and therefore also the only leaf
        this.scorer = scorer;
	}

	/**
	 *
	 * @return the current memory limit used to trigger automatic rebuilding
	 */
	public long getMemoryLimit() {
		return memLimit;
	}

	/**
	 * Gets the start of the list of leaf nodes (remember: the first node is a dummy node)
	 *
	 * @return
	 */
	public CFNode getLeafListStart() {
		return this.leafListStart;
	}

	/**
	 *
	 * @param limit memory limit in bytes
	 */
	public void setMemoryLimit(long limit) {
		this.memLimit = limit;
	}

	/**
	 *
	 * @param limit memory limit in Mbytes
	 */
	public void setMemoryLimitMB(long limit) {
		this.memLimit = limit*1024*1024;
	}

	/**
	 *
	 * @param period the number of insert operations after which we check whether the tree has reached the memory limit
	 */
	public void setPeriodicMemLimitCheck(long period) {
		this.periodicMemLimitCheck = period;
	}

	/**
	 * Inserts a single pattern vector into the CFTree
	 *
	 * @param x the pattern vector to be inserted in the tree
	 * @return true if insertion was successful
	 */
	public boolean insertEntry(double[] x) {
		instanceIndex++;
        if(instanceIndex % periodicMemLimitCheck == 0) {
			rebuildIfAboveMemLimit();
		}
		return insertEntry(new CFLeaf(x, instanceIndex));
		
	}

	/**
	 * Inserts an entire CFEntry into the tree. Used for tree rebuilding.
	 *
	 * @param e the CFEntry to insert
	 * @return true if insertion happened without problems
	 */
	private boolean insertEntry(CFEntry e) {
		boolean dontSplit = root.insertEntry(e);
		if(!dontSplit) {
			splitRoot();
		}
		return true; // after root is split, we are sure x was inserted correctly in the tree, and we return true
	}

	/**
	 * Every time we split the root, we check whether the memory limit imposed on the tree
	 * has been reached. In this case, we automatically increase the distance threshold and
	 * rebuild the tree.
	 *
	 * It is worth noting that since we only check memory consumption only during root split,
	 * and not for all node splits (for performance reasons), we cannot guarantee that
	 * the memory limit will not be exceeded. The tree may grow significantly between a
	 * root split and the next.
	 * Furthermore, the computation of memory consumption using the SizeOf class is only approximate.
	 *
	 * Notice also that if the threshold grows to the point that all the entries fall into one entry
	 * of the root (i.e., the root is the only node in the tree, and has only one sub-cluster)
	 * the automatic rebuild cannot decrease the memory consumption (because increasing the threshold
	 * has not effect on reducing the size of the tree), and if Java runs out of memory
	 * the program will terminate.
	 *
	 * @return true if rebuilt
	 */
	private boolean rebuildIfAboveMemLimit() {
        boolean wasRebuilt = false;
//        System.out.println("Check rebuilding");
		while (hasReachedMemoryLimit(this,memLimit)) {
//            System.out.println("REBUILDING...");
            removeUnimportantClusters();
            cleanTree();
            collapseTree();
			wasRebuilt = true;
            System.gc();
		}
		return wasRebuilt;
	}

    private void removeUnimportantClusters() {
        List<CFEntry> clusters = getClusters();
        List<CFEntry> clustersToDelete = new ArrayList<CFEntry>();

        int toDelete = Math.min(clusters.size(), (int) (PERCENT_DELETED * clusters.size()));

//        System.out.println("to delete : " + toDelete);

        while (toDelete > 0 && clusters.size() > 0) {
            double border = scorer.score(clusters.get(clusters.size() / 2));
            List<CFEntry> left = new ArrayList<CFEntry>();
            List<CFEntry> right = new ArrayList<CFEntry>();
            List<CFEntry> middle = new ArrayList<CFEntry>();

            for (CFEntry entry : clusters) {
                double score = scorer.score(entry);
                if (score > border + EPS) {
                    right.add(entry);
                } else {
                    if (Math.abs(score - border) <= EPS) {
                        middle.add(entry);
                    } else {
                        left.add(entry);
                    }
                }
            }

            if (left.size() <= toDelete) {
                clustersToDelete.addAll(left);
                toDelete -= left.size();

                if (middle.size() <= toDelete) {
                    clustersToDelete.addAll(middle);
                    toDelete -= middle.size();
                    clusters = right;
                } else {
                    clustersToDelete.addAll(middle.subList(0, toDelete));
                    toDelete = 0;
                }
            } else {
                clusters = left;
            }
        }

//        System.out.println("Clusters to remove count: " + clustersToDelete.size());
        for (CFEntry entry : clustersToDelete) {
            entry.setToDelete(true);
        }
    }

    private void cleanTree() {
        root.clean();
    }

    private void collapseTree() {
        root.collapse();
    }

    /**
	 * Splits the root to accommodate a new entry. The height of the tree grows by one.
	 */
	private void splitRoot() {
		// the split happens by finding the two entries in this node that are the most far apart
		// we then use these two entries as a "pivot" to redistribute the old entries into two new nodes

		CFEntryPair p = root.findFarthestEntryPair(root.getEntries());

		CFEntry newEntry1 = new CFEntry();
		CFNode newNode1 = new CFNode(root.getMaxNodeEntries(),root.getDistThreshold(),root.getDistFunction(),root.applyMergingRefinement(),root.isLeaf());
		newEntry1.setChild(newNode1);

		CFEntry newEntry2 = new CFEntry();
		CFNode newNode2 = new CFNode(root.getMaxNodeEntries(),root.getDistThreshold(),root.getDistFunction(),root.applyMergingRefinement(),root.isLeaf());
		newEntry2.setChild(newNode2);

		// the new root that hosts the new entries
		CFNode newRoot = new CFNode(root.getMaxNodeEntries(),root.getDistThreshold(),root.getDistFunction(),root.applyMergingRefinement(),false);
		newRoot.addToEntryList(newEntry1);
		newRoot.addToEntryList(newEntry2);

		// this updates the pointers to the list of leaves
		if(root.isLeaf()) { // if root was a leaf
			leafListStart.setNextLeaf(newNode1);
			newNode1.setPreviousLeaf(leafListStart);
			newNode1.setNextLeaf(newNode2);
			newNode2.setPreviousLeaf(newNode1);
		}

		// redistributes the entries in the root between newEntry1 and newEntry2
		// according to the distance to p.e1 and p.e2
		root.redistributeEntries(root.getEntries(),p,newEntry1,newEntry2);

		// updates the root
		root = newRoot;

		// frees some memory by deleting the nodes in the tree that had to be split
		System.gc();

	}

	/**
	 * Overwrites the structure of this tree (all nodes, entreis, and leaf list) with the structure of newTree.
	 *
	 * @param newTree the tree to be copied
	 */
	private void copyTree(CFTree newTree) {
		this.root = newTree.root;
		this.leafListStart = newTree.leafListStart;
	}

	/**
	 * Computes a new threshold based on the average distance of the closest subclusters in each leaf node
	 *
	 * @param leafListStart the pointer to the start of the list (the first node is assumed to be a place-holder dummy node)
	 * @param distFunction
	 * @param currentThreshold
	 * @return the new threshold
	 */
	public double computeNewThreshold(CFNode leafListStart,int distFunction, double currentThreshold) {
		double avgDist = 0;
		int n = 0;

		CFNode l = leafListStart.getNextLeaf();
		while(l!=null) {
			if(!l.isDummy()) {
				CFEntryPair p = l.findClosestEntryPair(l.getEntries());
				if(p!=null) {
					avgDist += p.e1.distance(p.e2, distFunction);
					n++;

					/* This is a possible alternative: Overall avg distance between leaf entries
					CFEntry[] v = l.getEntries().toArray(new CFEntry[0]);
					for(int i=0; i < v.length-1; i++) {
						for(int j=i+1; j < v.length; j++) {
							avgDist += v[i].distance(v[j], distFunction);
							n++;
						}
					}*/
				}
			}
			l = l.getNextLeaf();
		}

		double newThreshold = 0;
		if(n>0)
			newThreshold = avgDist/n;

		if(newThreshold <= currentThreshold) { // this guarantees that newThreshold always increases compared to currentThreshold
			newThreshold = 2*currentThreshold;
		}

		return newThreshold;
	}

	/**
	 * True if CFTree's memory occupation exceeds or is almost equal to the memory limit
	 *
	 * @param tree the tree to be tested
	 * @param limit the memory limit
	 * @return true if memory limit has been reached
	 */
	private boolean hasReachedMemoryLimit(CFTree tree, long limit) {
		long memory = computeMemorySize(tree);
//        System.out.println("MEMORY : " + memory + " / " + limit);
		if(memory >= limit) {
			return true;
		}

		return false;
	}

	/**
	 * Computes the memory usage of a CFTree
	 *
	 * @param t a CFTree
	 * @return memory usage in bytes
	 */
	private long computeMemorySize(CFTree t) {
		long memSize = 0;
		try {
			memSize = SizeOf.iterativeSizeOf(t);
		}
		catch(Exception e) {
			System.err.println("#################### ERROR WHEN COMPUTING MEMORY SIZE: " + e);
		}
		return memSize;
	}

	/**
	 *
	 * @return a list of subcluster, and for each subcluster a list of pattern vector indexes that belong to it
	 */
	public ArrayList<ArrayList<Integer>> getSubclusterMembers() {
		ArrayList<ArrayList<Integer>> membersList = new ArrayList<ArrayList<Integer>>();

		CFNode l = leafListStart.getNextLeaf(); // the first leaf is dummy!
		while(l!=null) {
			if(!l.isDummy()) {
				// System.out.println(l);
				for(CFEntry e : l.getEntries())
					membersList.add(e.getIndexList());
			}
			l = l.getNextLeaf();
		}

		return membersList;
	}

	/**
	 * Signals the fact that we finished inserting data.
	 * The obtained subclusters will be assigned a positive, unique ID number
	 */
	public void finishedInsertingData() {
		CFNode l = leafListStart.getNextLeaf(); // the first leaf is dummy!

		int id = 0;
		while(l!=null) {
			if(!l.isDummy()) {
				// System.out.println(l);
				for(CFEntry e : l.getEntries()) {
					id++;
					e.setSubclusterID(id);
				}
			}
			l = l.getNextLeaf();
		}
	}

	/**
	 * Retrieves the subcluster id of the closest leaf entry to e
	 *
	 * @return a positive integer, if the leaf entries were enumerated using finishedInsertingData(), otherwise -1
	 */
	public int mapToClosestSubcluster(double[] x) {
		CFEntry e = new CFEntry(x);

		return root.mapToClosestSubcluster(e);

	}

	/**
	 * Computes an estimate of the cost of running an O(n^2) algorithm to split each subcluster in more fine-grained clusters
	 *
	 * @return sqrt(sum_i[(n_i)^2]), where n_i is the number of members of the i-th subcluster
	 */
	public double computeSumLambdaSquared() {
		double lambdaSS = 0;

		CFNode l = leafListStart.getNextLeaf();
		while(l!=null) {
			if(!l.isDummy()) {
				for(CFEntry e : l.getEntries()) {
					lambdaSS += Math.pow(e.getIndexList().size(),2);
				}
			}
			l = l.getNextLeaf();
		}

		return Math.sqrt(lambdaSS);
	}

	/**
	 * prints the CFTree
	 */
	public void printCFTree() {
		System.out.println(root);
	}

	/**
	 * Counts the nodes of the tree (including leaves)
	 *
	 * @return the number of nodes in the tree
	 */
	public int countNodes() {
		int n = 1; // at least root has to be present
		n += root.countChildrenNodes();

		return n;
	}

	/**
	 * Counts the number of CFEntries in the tree
	 *
	 * @return the number of entries in the tree
	 */
	public int countEntries() {
		int n = root.size(); // at least root has to be present
		n += root.countEntriesInChildrenNodes();

		return n;
	}

	/**
	 * Counts the number of leaf entries (i.e., the number of sub-clusters in the tree)
	 *
	 * @return the number of leaf entries (i.e., the number of sub-clusters)
	 */
	public int countLeafEntries() {
		int i=0;
		CFNode l = leafListStart.getNextLeaf();
		while(l!=null) {
			if(!l.isDummy()) {
				i += l.size();
			}

			l = l.getNextLeaf();
		}

		return i;
	}

	/**
	 * Prints the index of all the pattern vectors that fall into the leaf nodes.
	 * This is only useful for debugging purposes.
	 */
	public void printLeafIndexes() {
		ArrayList<Integer> indexes = new ArrayList<Integer>();

		CFNode l = leafListStart.getNextLeaf();
		while(l!=null) {
			if(!l.isDummy()) {
				System.out.println(l);
				for(CFEntry e : l.getEntries())
					indexes.addAll(e.getIndexList());
			}
			l = l.getNextLeaf();
		}

		Integer[] v = indexes.toArray(new Integer[0]);
		Arrays.sort(v);
		System.out.println("Num of Indexes = " + v.length);
		System.out.println(Arrays.toString(v));
	}

	/**
	 * Prints the index of the pattern vectors in each leaf entry (i.e., each subcluster)
	 */
	public void printLeafEntries() {
		int i=0;
		CFNode l = leafListStart.getNextLeaf();
		while(l!=null) {
			if(!l.isDummy()) {
				for(CFEntry e : l.getEntries()) {
					System.out.println("[[" + (++i) +"]]");
					Integer[] v = e.getIndexList().toArray(new Integer[0]);
					Arrays.sort(v);
					System.out.println(Arrays.toString(v));
				}
			}
			
			l = l.getNextLeaf();
		}
	}

    private List<CFEntry> getClusters() {
        List<CFEntry> preLeaves = new ArrayList<CFEntry>();
        root.collectPreLeafEntries(preLeaves);
//        System.out.println("TOTAL NUMBER OF PRE LEAVES : " + preLeaves.size());
        return preLeaves;
    }

    public List<Pair<double[], double[]>> findPairs(double[] point, double treshold, double magic) {
        List<Pair<double[], double[]>> pairs = new ArrayList<Pair<double[], double[]>>();
        root.findPairs(point, treshold, magic, pairs);
        return pairs;
    }

	
}
